library(data.table)
library(ggplot2)
library(lfe)
library(stringr)
library(scales)
library(Hmisc)
library(readstata13)





##Y variable : approval rate X variable: loan_amount/conforming_limit 
full <- fread('../data/all_records07-17wcfm.csv') # ENTIRE HMDA
hmda <- full[loan_type == 1 & loan_purpose %in% c(1,3) & property_type == 1 & owner_occupancy == 1 & lien_status == 1]
hmda[TYPE %in% c(40,41) ,type := 'S']
hmda[TYPE %in% 10:14    ,type := 'B']
hmda<-hmda[type %in% c('B','S')]
hmda[,BANK := as.integer(type == 'B')]

hmda[action_taken %in% c(3,7), approval := 0]
hmda[action_taken %in% c(1,2), approval := 1]
hmda <- hmda[!is.na(hmda$approval),]

hmda[, conf.pct := loan_amount / conforming_limit]
hmda[, jumbo := as.integer(conf.pct>1.005)]

hmda[,conf.bucket := cut(conf.pct,breaks = c(0,seq(.5+.01,1.5+.01,by = 0.05)),labels = c(seq(.5,1.5,by = 0.05)))]

byBin <- hmda[year %in% 2007:2017 & conf.pct > .4 ,j=list(pct.approved = mean(approval,na.rm=T),pct.approved.bank = weighted.mean(approval,w=BANK,na.rm=T)),by='conf.bucket']

#for all lenders
ggplot(byBin[!is.na(conf.bucket)]) + geom_point(aes(x=as.numeric(as.character.factor(conf.bucket)),y=pct.approved),shape = 1,size=2) + theme_bw() + geom_vline(xintercept = 1,linetype = 'dashed') + scale_y_continuous(limits = c(0.7,0.85),labels = function(x) { paste0(100*x,'%')}) + scale_x_continuous(labels = function(x) {paste0(100*x,'%')}) + xlab(NULL) + ylab(NULL) 
ggsave('figure4a_approval_conforming.png',height=3,width=5,units = 'in')

##for banks only
ggplot(byBin[!is.na(conf.bucket)]) + geom_point(aes(x=as.numeric(as.character.factor(conf.bucket)),y=pct.approved.bank),shape = 1,size=2) + theme_bw() + geom_vline(xintercept = 1,linetype = 'dashed') + scale_y_continuous(limits = c(0.7,0.85),labels = function(x) { paste0(100*x,'%')}) + scale_x_continuous(labels = function(x) {paste0(100*x,'%')}) + xlab(NULL) + ylab(NULL)
ggsave('figure4a_approval_conforming_bank.png',height=3,width=5,units = 'in')

##for well capitalized banks only
full <- fread('~/Dropbox (CBS)/balance_sheet_final/data_intermediate/all_records07-17wcfm.csv')
avery <- data.table(read.dta13('~/Dropbox (CBS)/balance_sheet_final/data_intermediate/avery_panel.dta'))
balance.sheet <- fread('~/Dropbox (CBS)/balance_sheet_final/data_intermediate/balance_sheet.csv')

hmda <- full[loan_type == 1 & loan_purpose %in% c(1,3) & property_type == 1 & owner_occupancy == 1 & lien_status == 1]
hmda[TYPE %in% c(40,41) ,type := 'S']
hmda[TYPE %in% 10:14    ,type := 'B']
hmda<-hmda[type == 'B']

hmda[action_taken %in% c(3,7), approval := 0]
hmda[action_taken %in% c(1,2), approval := 1]
hmda <- hmda[!is.na(hmda$approval),]

hmda[, conf.pct := loan_amount / conforming_limit]
hmda[, jumbo := as.integer(conf.pct>1.005)]

temp <- avery[,c('year','respondent_id','agency_code','ENTITY')]

hmda <- merge(hmda, temp,by.x=c('year','respondent_id','agency_code'),by.y=c('year','respondent_id','agency_code'),all.x=F,all.y=F)
hmda[, rssdid := ENTITY] 
with.bs <- merge(hmda, balance.sheet, by.x=c('year','rssdid'),by.y=c('year','rssdid'), all.x=F, all.y=F)
with.bs[,as.cutoff := ave(total_assets,cut2(total_assets,g=2))]
with.bs[,cr.cutoff := quantile(cr,probs = .75,na.rm=T),by=c('year','as.cutoff')]
with.bs.high.cr <- with.bs[cr > cr.cutoff]
with.bs[,high.cr   := as.integer(cr > cr.cutoff)]

with.bs.high.cr[,conf.bucket := cut(conf.pct,breaks = c(0,seq(.5+.01,1.5+.01,by = 0.05)),labels = c(seq(.5,1.5,by = 0.05)))]

byBin <- with.bs.high.cr[year %in% 2007:2017 & conf.pct > .4 ,j=list(pct.approved = mean(approval,na.rm=T)),by='conf.bucket']

#for well-capitalized banks only
ggplot(byBin[!is.na(conf.bucket)]) + geom_point(aes(x=as.numeric(as.character.factor(conf.bucket)),y=pct.approved),shape = 1,size=2) + theme_bw() + geom_vline(xintercept = 1,linetype = 'dashed') + scale_y_continuous(limits = c(0.7,0.85),labels = function(x) { paste0(100*x,'%')}) + scale_x_continuous(labels = function(x) {paste0(100*x,'%')}) + xlab(NULL) + ylab(NULL)
ggsave('figure4a_approval_conforming_capitalized.png',height=3,width=5,units = 'in')


##for well-capitalized banks only: defined well-capitalized banks as cr gap>0
rm(list=ls())
full <- fread('~/Dropbox (CBS)/balance_sheet_final/data_intermediate/all_records07-17wcfm.csv')
avery <- data.table(read.dta13('~/Dropbox (CBS)/balance_sheet_final/data_intermediate/avery_panel.dta'))
hmda <- full[loan_type == 1 & loan_purpose %in% c(1,3) & property_type == 1 & owner_occupancy == 1 & lien_status == 1]
cr <- fread('~/Dropbox (CBS)/balance_sheet_final/data_intermediate/bankdata_conventional.csv')
temp <- avery[,c('year','respondent_id','agency_code','ENTITY')]
hmda <- merge(hmda, temp,by.x=c('year','respondent_id','agency_code'),by.y=c('year','respondent_id','agency_code'),all.x=F,all.y=F)
hmda[, rssdid := ENTITY] 
with.cr <- merge(hmda,cr[,c('rssdid','year','cr','total_assets','cr_gap'),with=F],by.x=c('rssdid','year'),by.y=c('rssdid','year'),all.x=T)

with.cr[,conf.pct := loan_amount / conforming_limit]
with.cr[action_taken %in% c(3,7), approval := 0]
with.cr[action_taken %in% c(1,2), approval := 1]

positive.cr <- with.cr[cr_gap>0,]
positive.cr[,conf.bucket := cut(conf.pct,breaks = c(0,seq(.5+.01,1.5+.01,by = 0.05)),labels = c(seq(.5,1.5,by = 0.05)))]


byBin <- positive.cr[conf.pct > .4 ,j=list(pct.approved = mean(approval,na.rm=T)),by='conf.bucket']
ggplot(byBin[!is.na(conf.bucket)]) + geom_point(aes(x=as.numeric(as.character.factor(conf.bucket)),y=pct.approved),shape = 1,size=2) + theme_bw() + geom_vline(xintercept = 1,linetype = 'dashed') + scale_y_continuous(limits = c(0.7,0.85),labels = function(x) { paste0(100*x,'%')}) + scale_x_continuous(labels = function(x) {paste0(100*x,'%')}) + xlab(NULL) + ylab(NULL)
ggsave('figure4a_approval_conforming_capitalized_crgap.png',height=3,width=5,units = 'in')